<!DOCTYPE html>
<title>Writing a parser - nearley.js - JS Parsing Toolkit</title>
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="nearley.js is a simple, fast, and powerful parser toolkit for JavaScript." />
<style>
body {
    font-family: BlinkMacSystemFont, -apple-system, Helvetica Neue, Open Sans, Ubuntu, Arial, Helvetica, sans-serif;
    font-weight: 300;
    font-size: 15pt;
    margin: 0px;
    padding: 0px;
}

#main a {
    color: #55a;
}
#main a:visited {
    color: #55a;
}
#main a:hover {
    color: #259;
}

h1 {
    font-size: 1.2em;
    font-weight: normal;
    letter-spacing: 0.2em;
    text-align: center;
    margin:  0em;
    padding: 1em;
}

h1 a {
    color: #559;
    text-decoration: none;
}

h1 a:hover {
    color: orange;
    cursor: pointer;
}

#version {
    background: orange;
    border-radius: 50%;
    font-weight: bold;
    color: white;
    width: 3em;
    height: 3em;
    display: inline-block;
    text-align: center;
    line-height: 3em;
    letter-spacing: 0em;
}

h2 {
    font-size: 1.5em;
    margin-top: 2em;
}
h3 {
    margin-top: 3em;
}
h4 {
    margin-top: 3em;
}

.center {
    padding-left: 2em;
    padding-right: 2em;
    margin-left: auto;
    margin-right: auto;
}
#main {
    max-width: 35em;
}

#main p, ul, ol {
    line-height: 1.5em;
    margin-bottom: 1em;
}

#main li {
    margin-top: 1em;
    margin-bottom: 1em;
}

#main ul li {
    list-style: none;
    min-width: 10em;
}
#main ul li:before {
    display: inline-block;
    content: "\2014";
    margin-left: -1.5em;
    width: 1.5em;
    position: absolute;
}

#main textarea {
    width: 100%;
    height: 20em;
    resize: none;
    margin-top: 5px;
    margin-bottom: 5px;

    font-family: menlo, monaco, monospace;
    font-size: 10pt;
    padding: 5px;

    border: none;
    border-left: solid 1px #aaa;
}
#main textarea:focus {
    outline: none;
}

#main img {
    display: block;
    margin-left: auto;
    margin-right: auto;
    max-width: 100%;
}
img + .caption {
    text-align: center;
    font-size: 0.7em;
    color: #aaa;
}

#footer {
    margin-top: 10em;
    color: #aaa;
    font-size: 0.7em;
    padding: 5em;
}

code {
    margin-left: 0.2em;
    margin-right: 0.2em;
    padding: 0.2em;
    border-radius: 0.2em;
    background-color: #eee;
}

pre code {
    border-radius: 0.5em;
    padding: 0.5em;
    display: block;
    overflow: auto;
}


ol {
    counter-reset: tutorial-counter;
    position: relative;
}

ol li {
    list-style: none;
}

ol li:before {
    content: counter(tutorial-counter);
    counter-increment: tutorial-counter;

    font-weight: bold;
    border-radius: 50%;
    background-color: #559;
    width: 2em;
    height: 2em;
    display: inline-block;
    text-align: center;
    line-height: 2em;
    color: white;

    margin-left: -2em;

    position: absolute;
    left: 1em;
}

input[type="text"] {
    border: none;
    font-family: monospace;
    font-size: 1em;
    color: #559;
}

input[type="text"]:focus {
    outline: none;
}


nav {
    border-bottom: 2px solid #559;
    background: #fafafa;
}
nav ul {
    padding: 0;
    margin: 0;
}
nav ul a {
    text-decoration: none;
    color: #334;
    display: block;
    overflow: hidden;
    text-overflow: ellipsis;
    padding: 0.25em 0.5em; */
}
.page {
    display: block;
}

.page-title-active,
.page-heading-active a {
    color: #66f;
}

.page-heading {
    display: block;
    padding-left: 1.5em;
}


@media screen and (min-width: 36rem) {
    nav {
        position: fixed;
        width: 16rem;
        left: 0;
        top: 0;
        bottom: 0;
        white-space: nowrap;
        overflow-y: auto;
        font-size: 1rem;
        border-bottom: none;
        border-right: 2px solid #559;
    }
    body.docs-page {
        margin-left: 16rem;
    }
}

/* highlight.js */

.hljs{display:block;overflow-x:auto;padding:0.5em;background:#F0F0F0}.hljs,.hljs-subst{color:#444}.hljs-comment{color:#888888}.hljs-keyword,.hljs-attribute,.hljs-selector-tag,.hljs-meta-keyword,.hljs-doctag,.hljs-name{font-weight:bold}.hljs-type,.hljs-string,.hljs-number,.hljs-selector-id,.hljs-selector-class,.hljs-quote,.hljs-template-tag,.hljs-deletion{color:#880000}.hljs-title,.hljs-section{color:#880000;font-weight:bold}.hljs-regexp,.hljs-symbol,.hljs-variable,.hljs-template-variable,.hljs-link,.hljs-selector-attr,.hljs-selector-pseudo{color:#BC6060}.hljs-literal{color:#78A960}.hljs-built_in,.hljs-bullet,.hljs-code,.hljs-addition{color:#397300}.hljs-meta{color:#1f7199}.hljs-meta-string{color:#4d99bf}.hljs-emphasis{font-style:italic}.hljs-strong{font-weight:bold}


</style>

<body class="docs-page">

<a href="https://github.com/kach/nearley" class="github-corner"><svg width="80" height="80" viewBox="0 0 250 250" style="fill:#151513; color:#fff; position: absolute; top: 0; border: 0; right: 0;"><path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path><path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2" fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path><path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z" fill="currentColor" class="octo-body"></path></svg></a><style>.github-corner:hover .octo-arm{animation:octocat-wave 560ms ease-in-out}@keyframes octocat-wave{0%,100%{transform:rotate(0)}20%,60%{transform:rotate(-25deg)}40%,80%{transform:rotate(10deg)}}@media (max-width:500px){.github-corner:hover .octo-arm{animation:none}.github-corner .octo-arm{animation:octocat-wave 560ms ease-in-out}}</style>
<nav id="nav">
    <h1><a href="/">nearley<span style="color: #559;">.js</span><span id="version">2.20.1</span></a></h1>
    <ul>
        <li class="page"><a href="/docs/index">Home</a>
        <li class="page"><a href="/docs/getting-started">Getting started</a>
        <li class="page">
            <a href="#" class="page-title-active">Writing a parser</a>
            <ul>
                <li class="page-heading"><a href="#vocabulary">Vocabulary</a>
                <li class="page-heading"><a href="#postprocessors">Postprocessors</a>
                <li class="page-heading"><a href="#more-syntax-tips-and-tricks">More syntax: tips and tricks</a>
                <li class="page-heading"><a href="#macros">Macros</a>
                <li class="page-heading"><a href="#additional-js">Additional JS</a>
                <li class="page-heading"><a href="#importing-other-grammars">Importing other grammars</a>
                <li class="page-heading"><a href="#whats-next">What’s next?</a>
            </ul>
        </li>
        <li class="page"><a href="/docs/parser">Using a parser</a>
        <li class="page"><a href="/docs/tokenizers">Tokenizers</a>
        <li class="page"><a href="/docs/tooling">Tooling</a>
        <li class="page"><a href="/docs/how-to-grammar-good">How to grammar good</a>
        <li class="page"><a href="/docs/using-in-frontend">Compiling in browsers</a>
        <li class="page"><a href="/docs/glossary">Glossary</a>
    </ul>
</nav>

<div id="main" class="center">

<h2>Writing a parser</h2>
<p>This section describes the nearley grammar language, in which you can describe
grammars for nearley to parse. Grammars are conventionally kept in <code>.ne</code> files.
You can then use <code>nearleyc</code> to compile your <code>.ne</code> grammars to JavaScript
modules.</p>
<p>You can find many examples of nearley grammars online, as well as some in the
<code>examples/</code> directory of the <a href="http://github.com/Hardmath123/nearley">Github
repository</a>.</p>
<h3 id="vocabulary">Vocabulary</h3>
<ul>
<li>A <em>terminal</em> is a single, constant string or a token. For example, the
keyword <code>&quot;if&quot;</code> is a terminal.</li>
<li>A <em>nonterminal</em> describes a set of possible strings. For example, all “if”
statements can be described by a single nonterminal whose value depends on
the condition and body of the if statement.</li>
<li>A <em>rule</em> (or production rule) is a definition of a nonterminal. For example,<pre><code class="language-ne">ifStatement -&gt; &quot;if&quot; condition &quot;then&quot; statement &quot;endif&quot;</code></pre>
is the rule according to which the if statement nonterminal, <code>ifStatement</code>,
is parsed. It depends on the nonterminals <code>condition</code> and <code>statement</code>. A
nonterminal can be described by multiple rules. For example, we can add a
second rule<pre><code class="language-ne">ifStatement -&gt; &quot;if&quot; condition &quot;then&quot; statement &quot;else&quot; statement &quot;endif&quot;</code></pre>
to support “else” clauses.</li>
</ul>
<p>By default, nearley attempts to parse the first nonterminal defined in the
grammar. In the following grammar, nearley will try to parse input text as an
<code>expression</code>.</p>
<pre><code class="language-ne">expression -&gt; number &quot;+&quot; number
expression -&gt; number &quot;-&quot; number
expression -&gt; number &quot;*&quot; number
expression -&gt; number &quot;/&quot; number
number -&gt; [0-9]:+</code></pre>
<p>You can use the pipe character <code>|</code> to separate alternative rules for a
nonterminal. In the example below, <code>expression</code> has four different rules.</p>
<pre><code class="language-ne">expression -&gt;
    number &quot;+&quot; number
  | number &quot;-&quot; number
  | number &quot;*&quot; number
  | number &quot;/&quot; number</code></pre>
<p>The keyword <code>null</code> stands for the <strong>epsilon rule</strong>, which matches nothing. The
following nonterminal matches zero or more <code>cow</code>s in a row, such as
<code>cowcowcow</code>:</p>
<pre><code class="language-ne">a -&gt; null | a &quot;cow&quot;</code></pre>
<h3 id="postprocessors">Postprocessors</h3>
<p>By default, nearley wraps everything matched by a rule into an array. For
example, when <code>rule -&gt; &quot;tick&quot; &quot;tock&quot;</code> matches the string <code>&quot;ticktock&quot;</code>, it
creates the “parse tree” <code>[&quot;tick&quot;, &quot;tock&quot;]</code>. Most of the time, however, you
need to process that data in some way: for example, you may want to filter out
whitespace, or transform the results into a custom JavaScript object.</p>
<p>For this purpose, each rule can have a <em>postprocessor</em>: a JavaScript function
that transforms the array and returns a “processed” version of the result.
Postprocessors are wrapped in <code>{% %}</code>s:</p>
<pre><code class="language-ne">expression -&gt; number &quot;+&quot; number {%
    function(data) {
        return {
            operator: &quot;sum&quot;,
            leftOperand:  data[0],
            rightOperand: data[2] // data[1] is &quot;+&quot;
        };
    }
%}
number -&gt; [0-9]:+ {% d =&gt; parseInt(d[0].join(&quot;&quot;)) %}</code></pre>
<p>The rules above will parse the string <code>5+10</code> into <code>{ operator: &quot;sum&quot;,
leftOperand: 5, rightOperand: 10 }</code>.</p>
<p>The postprocessor can be any function with signature <code>function(data, location,
reject)</code>. Here,</p>
<ul>
<li><p><code>data: Array</code> is an array that contains the results of parsing each part of
the rule. Note that it is still an array, even if the rule only has one part!
You can use the built-in <code>{% id %}</code> postprocessor to convert a one-item array
into the item itself.</p>
<p>For <strong>arrow function</strong> users, a convenient pattern is to decompose the <code>data</code>
array within the argument of the arrow function:</p>
<pre><code class="language-ne">expression -&gt;
    number &quot;+&quot; number {% ([fst, _, snd]) =&gt; fst + snd %}
  | number &quot;-&quot; number {% ([fst, _, snd]) =&gt; fst - snd %}
  | number &quot;*&quot; number {% ([fst, _, snd]) =&gt; fst * snd %}
  | number &quot;/&quot; number {% ([fst, _, snd]) =&gt; fst / snd %}</code></pre>
</li>
<li><p><code>location: number</code> is the index (zero-based) at which the rule match starts.
You might use this to show the location of an expression in an error message.</p>
<blockquote>
<p>Note: Many <a href="tokenizers">tokenizers</a> provide line, column, and offset
information in the Token object. If you are using a tokenizer, then it is
better to use that information than the nearley-provided variable, which
would only tell you that it saw the nth <em>token</em> rather than the nth
<em>character</em> in the string.</p>
</blockquote>
</li>
<li><p><code>reject: Object</code> is a unique object that you can return to signal that this
rule doesn’t <em>actually</em> match its input.</p>
<p>Reject is used in some edge cases. For example, suppose you want sequences of
letters to match variables, except for the keyword <code>if</code>. In this case, your
rule may be</p>
<pre><code class="language-ne">variable -&gt; [a-z]:+ {%
    function(d,l, reject) {
        const name = d[0].join(&#39;&#39;);
        if (name === &#39;if&#39;) {
            return reject;
        } else {
            return { name };
        }
    }
%}</code></pre>
<blockquote>
<p>Warning: Grammars using <code>reject</code> are not context-free, and are often much
slower to parse. So, we encourage you not to use <code>reject</code> unless absolutely
necessary. You can usually use a tokenizer instead.</p>
</blockquote>
</li>
</ul>
<p>nearley provides one built-in postprocessor:</p>
<ul>
<li><code>id</code> returns the first element of the <code>data</code> array. This is useful to
extract the content of a single-element array: <code>foo -&gt; bar {% id %}</code></li>
</ul>
<h3 id="more-syntax-tips-and-tricks">More syntax: tips and tricks</h3>
<h4 id="comments">Comments</h4>
<p>Comments are marked with ‘#’. Everything from <code>#</code> to the end of a line is
ignored:</p>
<pre><code class="language-ne">expression -&gt; number &quot;+&quot; number # sum of two numbers</code></pre>
<h4 id="charsets">Charsets</h4>
<p>You can use valid RegExp charsets in a rule (unless you’re using a
<a href="tokenizers">tokenizer</a>):</p>
<pre><code>not_a_letter -&gt; [^a-zA-Z]</code></pre><p>The <code>.</code> character can be used to represent any character.</p>
<h4 id="case-insensitive-string-literals">Case-insensitive string literals</h4>
<p>You can create case-insensitive string literals by adding an <code>i</code> after the
string literal:</p>
<pre><code>cow -&gt; &quot;cow&quot;i # matches CoW, COW, and so on.</code></pre><p>Note that if you are using a lexer, your lexer should use the <code>i</code> flag in its
regexes instead. That is, if you are using a lexer, you should <em>not</em> use the
<code>i</code> suffix in nearley.</p>
<h4 id="ebnf">EBNF</h4>
<p>nearley supports the <code>*</code>, <code>?</code>, and <code>+</code> operators from
<a href="https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form">EBNF</a> as shown:</p>
<pre><code class="language-ne">batman -&gt; &quot;na&quot;:* &quot;batman&quot; # nananana...nanabatman</code></pre>
<p>You can also use capture groups with parentheses. Its contents can be anything
that a rule can have:</p>
<pre><code class="language-ne">banana -&gt; &quot;ba&quot; (&quot;na&quot; {% id %} | &quot;NA&quot; {% id %}):+</code></pre>
<h3 id="macros">Macros</h3>
<p>Macros allow you to create polymorphic rules:</p>
<pre><code class="language-ne"># Matches &quot;&#39;Hello?&#39; &#39;Hello?&#39; &#39;Hello?&#39;&quot;
matchThree[X] -&gt; $X &quot; &quot; $X &quot; &quot; $X
inQuotes[X] -&gt; &quot;&#39;&quot; $X &quot;&#39;&quot;

main -&gt; matchThree[inQuotes[&quot;Hello?&quot;]]</code></pre>
<p>Macros are dynamically scoped, which means they see arguments passed to parent
macros:</p>
<pre><code class="language-ne"># Matches &quot;Cows oink.&quot; and &quot;Cows moo!&quot;
sentence[ANIMAL, PUNCTUATION] -&gt; animalGoes[(&quot;moo&quot; | &quot;oink&quot; | &quot;baa&quot;)] $PUNCTUATION
animalGoes[SOUND] -&gt; $ANIMAL &quot; &quot; $SOUND # uses $ANIMAL from its caller

main -&gt; sentence[&quot;Cows&quot;, (&quot;.&quot; | &quot;!&quot;)]</code></pre>
<p>Macros are expanded at compile time and inserted in places they are used. They
are not “real” rules. Therefore, macros <em>cannot</em> be recursive (<code>nearleyc</code> will
go into an infinite loop trying to expand the macro-loop). They must also be
defined <em>before</em> they are used (except by other macros).</p>
<h3 id="additional-js">Additional JS</h3>
<p>For more intricate postprocessors, or any other functionality you may need, you
can include chunks of JavaScript code between production rules by surrounding
it with <code>@{% ... %}</code>:</p>
<pre><code class="language-ne">@{%
const cowSays = require(&quot;./cow.js&quot;);
%}

cow -&gt; &quot;moo&quot; {% ([moo]) =&gt; cowSays(moo) %}</code></pre>
<p>Note that it doesn’t matter where you add these; they all get hoisted to the
top of the generated code.</p>
<h3 id="importing-other-grammars">Importing other grammars</h3>
<p>You can include the content of other grammar files:</p>
<pre><code class="language-ne">@include &quot;../misc/primitives.ne&quot; # path relative to file being compiled
sum -&gt; number &quot;+&quot; number # uses &quot;number&quot; from the included file</code></pre>
<p>There are some common nonterminals like “integer” and “double-quoted string”
that ship with nearley to help you prototype grammars efficiently. You can
include them using the <code>@builtin</code> directive:</p>
<pre><code class="language-ne">@builtin &quot;number.ne&quot;
main -&gt; int:+</code></pre>
<p>(Note that we mean “efficient” in the sense that you can get them set up very
quickly. The builtins are <em>inefficient</em> in the sense that they make your parser
slower. For a “real” project, you would want to switch to a lexer and implement
these primitives yourself!)</p>
<p>See the <a href="https://github.com/kach/nearley/tree/master/builtin"><code>builtin/</code></a> directory on Github for more details. Contributions are
welcome!</p>
<p>Note that including a file imports <em>all</em> of the nonterminals defined in it, as
well as any JS, macros, and configuration options defined there.</p>
<h3 id="whats-next">What’s next?</h3>
<p>Now that you have a grammar, you’re ready to <a href="parser">learn how to use it to build a
parser!</a></p>


    <div id="footer"><p>nearley is MIT-licensed. It's maintained by <a
    href="http://hardmath123.github.io">Hardmath123</a> (<a
    href="http://github.com/Hardmath123">Github</a>). You're welcome to
    leave questions, comments, advice, or ideas as Github issues. And
    feel free to fork nearley with your own experiments!<br/>
    
    <a href="https://js.org" target="_blank" title="JS.ORG | JavaScript Community">
    <img src="https://logo.js.org/dark_horz.png" width="102" alt="JS.ORG Logo"/></a>
</div>

<script id="highlight-js" async src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
<script>
document.querySelector('#highlight-js').addEventListener('load', function() {
    [].slice.apply(document.querySelectorAll('pre code')).forEach(function(b) {
        hljs.highlightBlock(b)
    })
})
</script>


<script>
window.addEventListener('scroll', onScroll)
window.addEventListener('resize', onScroll)

var timeout
function onScroll(e) {
    if (timeout) clearTimeout(timeout)
    timeout = setTimeout(scrollEnd, 50)
}

function scrollEnd() {
  var headings = document.querySelectorAll('h3')
  var offset = 60
  for (var i=headings.length; i--; ) {
    var h3 = headings[i]
    if (h3.offsetTop < window.scrollY + offset) {
      var cur = document.querySelector('.page-heading-active')
      if (cur) cur.className = 'page-heading'
      document.querySelector('a[href="#' + h3.id + '"]').parentNode.className = 'page-heading page-heading-active'
      return
    }
  }
}
</script>

<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-46120535-5', 'auto');
ga('send', 'pageview');
</script>

